name: Conformance External Workloads (ci-external-workloads)

# Any change in triggers needs to be reflected in the concurrency group.
on:
  workflow_dispatch:
    inputs:
      PR-number:
        description: "Pull request number."
        required: true
      context-ref:
        description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)."
        required: true
      SHA:
        description: "SHA under test (head of the PR branch)."
        required: true
      extra-args:
        description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow."
        required: false
        default: '{}'
  # Run every 6 hours
  schedule:
    - cron:  '0 4/6 * * *'

# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
  # To be able to access the repository with actions/checkout
  contents: read
  # To allow retrieving information from the PR API
  pull-requests: read
  # To be able to set commit status
  statuses: write
  # To be able to request the JWT from GitHub's OIDC provider
  id-token: write

concurrency:
  # Structure:
  # - Workflow name
  # - Event type
  # - A unique identifier depending on event type:
  #   - schedule: SHA
  #   - workflow_dispatch: PR number
  #
  # This structure ensures a unique concurrency group name is generated for each
  # type of testing, such that re-runs will cancel the previous run.
  group: |
    ${{ github.workflow }}
    ${{ github.event_name }}
    ${{
      (github.event_name == 'schedule' && github.sha) ||
      (github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
    }}
  cancel-in-progress: true

env:
  clusterName: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}-vm
  vmName: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}-vm
  vmStartupScript: .github/gcp-vm-startup.sh
  cilium_cli_ci_version:
  CILIUM_CLI_MODE: helm
  check_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
  USE_GKE_GCLOUD_AUTH_PLUGIN: True

jobs:
  commit-status-start:
    name: Commit Status Start
    runs-on: ubuntu-latest
    steps:
      - name: Set initial commit status
        uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0  
        with:
          sha: ${{ inputs.SHA || github.sha }}

  generate-matrix:
    name: Generate Matrix
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - name: Checkout context ref (trusted)
        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
        with:
          ref: ${{ inputs.context-ref || github.sha }}
          persist-credentials: false

      - name: Convert YAML to JSON
        run: |
          work_dir=".github/actions/gke"
          destination_directory="/tmp/generated/gke"
          mkdir -p "${destination_directory}"

          yq -o=json ${work_dir}/k8s-versions.yaml | jq . > "${destination_directory}/gke.json"

      - name: Generate Matrix
        id: set-matrix
        run: |
          cd /tmp/generated/gke

          # Use complete matrix in case of scheduled run
          # main -> event_name = schedule
          # other stable branches -> PR-number starting with v (e.g. v1.14)
          if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.PR-number }}" == v* ]];then
            jq '{ "include": [ .k8s[] ] }' gke.json > /tmp/matrix.json
          else
            jq '{ "include": [ .k8s[] | select(.default) ] }' gke.json > /tmp/matrix.json
          fi

          echo "Generated matrix:"
          cat /tmp/matrix.json
          echo "matrix=$(jq -c . < /tmp/matrix.json)" >> $GITHUB_OUTPUT

  installation-and-connectivity:
    name: Installation and Connectivity Test
    needs: generate-matrix
    runs-on: ubuntu-latest
    timeout-minutes: 45
    env:
      job_name: "Installation and Connectivity Test"
      preemptible: ${{ github.event_name != 'schedule' && '--preemptible' || '' }}
    strategy:
      fail-fast: false
      matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}

    steps:
      - name: Checkout context ref (trusted)
        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
        with:
          ref: ${{ inputs.context-ref || github.sha }}
          persist-credentials: false

      - name: Set Environment Variables
        uses: ./.github/actions/set-env-variables

      - name: Get Cilium's default values
        id: default_vars
        uses: ./.github/actions/helm-default
        with:
          image-tag: ${{ inputs.SHA }}
          chart-dir: ./untrusted/install/kubernetes/cilium

      - name: Set up job variables
        id: vars
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            OWNER="${{ inputs.PR-number }}"
          else
            OWNER="${{ github.ref_name }}"
            OWNER="${OWNER/./-}"
          fi

          CILIUM_INSTALL_DEFAULTS="${{ steps.default_vars.outputs.cilium_install_defaults }} \
            --cluster-name=${{ env.clusterName }} \
            --datapath-mode=tunnel \
            --helm-set kubeProxyReplacement=true"
          CONNECTIVITY_TEST_DEFAULTS="--flow-validation=disabled --hubble=false --collect-sysdump-on-failure \
            --external-target google.com --external-cidr 8.0.0.0/8 --external-ip 8.8.4.4 --external-other-ip 8.8.8.8"
          # Explicitly specify LoadBalancer service type since the default type is NodePort in Helm mode.
          # Ref: https://github.com/cilium/cilium-cli/pull/1527#discussion_r1177244379
          #
          # In Helm mode, externalWorkloads.enabled is set to false by default. You need to pass
          # --enable-external-workloads flag to enable it.
          # Ref: https://github.com/cilium/cilium/pull/25259
          CLUSTERMESH_ENABLE_DEFAULTS="--service-type LoadBalancer --enable-external-workloads"
          echo cilium_install_defaults=${CILIUM_INSTALL_DEFAULTS} >> $GITHUB_OUTPUT
          echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
          echo clustermesh_enable_defaults=${CLUSTERMESH_ENABLE_DEFAULTS} >> $GITHUB_OUTPUT
          echo sha=${{ steps.default_vars.outputs.sha }} >> $GITHUB_OUTPUT
          echo owner=${OWNER} >> $GITHUB_OUTPUT

      - name: Install Cilium CLI
        uses: cilium/cilium-cli@7306e3cdc6caee738157f08e3e1ba26179f104e5 # v0.15.23
        with:
          repository: ${{ env.CILIUM_CLI_RELEASE_REPO }}
          release-version: ${{ env.CILIUM_CLI_VERSION }}
          ci-version: ${{ env.cilium_cli_ci_version }}

      - name: Set up gcloud credentials
        id: 'auth'
        uses: google-github-actions/auth@a6e2e39c0a0331da29f7fd2c2a20a427e8d3ad1f # v2.1.1
        with:
          workload_identity_provider: ${{ secrets.GCP_PR_WORKLOAD_IDENTITY_PROVIDER }}
          service_account: ${{ secrets.GCP_PR_SA }}
          create_credentials_file: true
          export_environment_variables: true

      - name: Set up gcloud CLI
        uses: google-github-actions/setup-gcloud@98ddc00a17442e89a24bbf282954a3b65ce6d200 # v2.1.0
        with:
          project_id: ${{ secrets.GCP_PROJECT_ID }}
          version: "405.0.0"

      - name: Install gke-gcloud-auth-plugin
        run: |
          gcloud components install gke-gcloud-auth-plugin

      - name: Display gcloud CLI info
        run: |
          gcloud info

      - name: Create GCP VM
        uses: nick-invision/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0
        with:
          retry_on: error
          timeout_minutes: 1
          max_attempts: 10
          command: |
            gcloud compute instances create ${{ env.vmName }}-${{ matrix.vmIndex }} \
              --labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
              --zone ${{ matrix.zone }} \
              --machine-type e2-custom-2-4096 \
              --boot-disk-type pd-standard \
              --boot-disk-size 10GB \
              ${{ env.preemptible }} \
              --image-project ubuntu-os-cloud \
              --image-family ubuntu-2004-lts \
              --metadata hostname=${{ env.vmName }}-${{ matrix.vmIndex }} \
              --metadata-from-file startup-script=${{ env.vmStartupScript}}

      - name: Create GKE cluster
        run: |
          gcloud container clusters create ${{ env.clusterName }} \
            --labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
            --zone ${{ matrix.zone }} \
            --cluster-version ${{ matrix.version }} \
            --enable-ip-alias \
            --node-taints node.cilium.io/agent-not-ready=true:NoExecute \
            --cluster-ipv4-cidr="/21" \
            --services-ipv4-cidr="/24" \
            --image-type COS_CONTAINERD \
            --num-nodes 2 \
            --machine-type e2-custom-2-4096 \
            --disk-type pd-standard \
            --disk-size 20GB \
            ${{ env.preemptible }}

      - name: Get cluster credentials
        run: |
          gcloud container clusters get-credentials ${{ env.clusterName }} --zone ${{ matrix.zone }}

      # Warning: since this is a privileged workflow, subsequent workflow job
      # steps must take care not to execute untrusted code.
      - name: Checkout pull request branch (NOT TRUSTED)
        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
        with:
          ref: ${{ steps.vars.outputs.sha }}
          persist-credentials: false
          path: untrusted
          sparse-checkout: |
            install/kubernetes/cilium

      - name: Wait for images to be available
        timeout-minutes: 30
        shell: bash
        run: |
          for image in cilium-ci operator-generic-ci hubble-relay-ci clustermesh-apiserver-ci ; do
            until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
          done

      - name: Install Cilium in cluster
        id: install-cilium
        run: |
          cilium install ${{ steps.vars.outputs.cilium_install_defaults }}

      - name: Enable cluster mesh
        run: |
          cilium clustermesh enable ${{ steps.vars.outputs.clustermesh_enable_defaults }}

      - name: Wait for cluster mesh status to be ready
        run: |
          cilium clustermesh status --wait

      - name: Add VM to cluster mesh
        run: |
          cilium clustermesh vm create ${{ env.vmName }}-${{ matrix.vmIndex }} -n default --ipv4-alloc-cidr 10.192.1.0/30
          cilium clustermesh vm status

      - name: Install Cilium on VM
        run: |
          cilium clustermesh vm install install-external-workload.sh --config debug
          gcloud compute scp install-external-workload.sh ${{ env.vmName }}-${{ matrix.vmIndex }}:~/ --zone ${{ matrix.zone }}
          gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
            --command "~/install-external-workload.sh"
          sleep 5s
          gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
            --command "sudo cilium-dbg status"

      - name: Verify cluster DNS on VM
        # Limit nslookup to the first (global) DNS server setting
        run: |
          gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
            --command "nslookup -d2 -retry=10 -timeout=5 -norecurse clustermesh-apiserver.kube-system.svc.cluster.local \$(systemd-resolve --status | grep -m 1 \"Current DNS Server:\" | cut -d':' -f2)"

      - name: Ping clustermesh-apiserver from VM
        run: |
          gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
            --command "ping -c 3 \$(sudo cilium-dbg service list get -o jsonpath='{[?(@.spec.flags.name==\"clustermesh-apiserver\")].spec.backend-addresses[0].ip}')"

      - name: Make JUnit report directory
        run: |
          mkdir -p cilium-junits

      - name: Run connectivity test (${{ join(matrix.*, ', ') }})
        run: |
          cilium connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} \
          --junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}).xml" \
          --junit-property github_job_step="Run connectivity test (${{ join(matrix.*, ', ') }})"

      - name: Post-test information gathering
        if: ${{ !success() && steps.install-cilium.outcome != 'skipped' }}
        run: |
          kubectl get pods --all-namespaces -o wide
          kubectl get cew --all-namespaces -o wide
          kubectl get cep --all-namespaces -o wide
          cilium status
          cilium clustermesh status
          cilium clustermesh vm status
          gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} --command "sudo cilium status"
          gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} --command "sudo docker logs cilium --timestamps"
          cilium sysdump --output-filename cilium-sysdump-final-${{ join(matrix.*, '-') }}
        shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently

      - name: Clean up GKE cluster and VM
        if: ${{ always() }}
        run: |
          while [ "$(gcloud container operations list --zone ${{ matrix.zone }} --filter="status=RUNNING AND targetLink~${{ env.clusterName }}" --format="value(name)")" ];do
            echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 15
          done
          gcloud container clusters delete ${{ env.clusterName }} --zone ${{ matrix.zone }} --quiet --async
          gcloud compute instances delete ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} --quiet
        shell: bash {0} # Disable default fail-fast behavior so that all commands run independently

      - name: Upload artifacts
        if: ${{ !success() }}
        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: cilium-sysdumps-${{ matrix.vmIndex }}
          path: cilium-sysdump-*.zip

      - name: Upload JUnits [junit]
        if: ${{ always() }}
        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: cilium-junits-${{ matrix.vmIndex }}
          path: cilium-junits/*.xml

      - name: Publish Test Results As GitHub Summary
        if: ${{ always() }}
        uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
        with:
          junit-directory: "cilium-junits"

  merge-upload:
    if: ${{ always() }}
    name: Merge and Upload Artifacts
    runs-on: ubuntu-latest
    needs: installation-and-connectivity
    steps:
      - name: Merge Sysdumps
        if: ${{ needs.installation-and-connectivity.result == 'failure' }}
        uses: actions/upload-artifact/merge@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: cilium-sysdumps
          pattern: cilium-sysdumps-*
          retention-days: 5
          delete-merged: true
        continue-on-error: true
      - name: Merge JUnits
        uses: actions/upload-artifact/merge@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
        with:
          name: cilium-junits
          pattern: cilium-junits-*
          retention-days: 5
          delete-merged: true

  commit-status-final:
    if: ${{ always() }}
    name: Commit Status Final
    needs: installation-and-connectivity
    runs-on: ubuntu-latest
    steps:
      - name: Set final commit status
        uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0  
        with:
          sha: ${{ inputs.SHA || github.sha }}
          status: ${{ needs.installation-and-connectivity.result }}
